Loading packages and data ———————————————–

library(dplyr)
library(scater)
library(SeuratDisk)
library(Seurat)
library(ggplot2)
library(RColorBrewer)
library(readr)

Loading in the h5seurat files

real_validation <- LoadH5Seurat("68KPBMC_real_7kTest.h5seurat") # real data

scgan_validation <- LoadH5Seurat("68kPBMC_scGANgenerated_7ktesting.h5seurat") # scGAN data

activa_validation <- LoadH5Seurat("68kPBMC-6991Generated_10RecLoss_Epoch600.h5seurat") # ACTIVA Data

# Changing the barcodes in ACTIVA validation set to be the same as the real validation set
rownames(activa_validation@meta.data) <- rownames(real_validation@meta.data)

Adding Cell Type Annotations ——————————————–

# Reading in original dataset labels
ann68kpbmc <- read_tsv("https://raw.githubusercontent.com/10XGenomics/single-cell-3prime-paper/master/pbmc68k_analysis/68k_pbmc_barcodes_annotation.tsv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   TSNE.1 = col_double(),
##   TSNE.2 = col_double(),
##   barcodes = col_character(),
##   celltype = col_character()
## )
# Real data cell labels
real_validation@meta.data # taking a quick look

real_val_barcodes <- real_validation@meta.data %>%
  tibble::rownames_to_column(var = "Barcodes") %>%
  pull(Barcodes)

real_validation_celltype <- ann68kpbmc %>%
  filter(barcodes %in% real_val_barcodes)

# table(rownames(real_validation@meta.data) == real_validation_celltype$barcodes)

# ACTIVA validation cell labels
activa_validation@meta.data

activa_val_barcodes <- activa_validation@meta.data %>%
  tibble::rownames_to_column(var = "Barcodes") %>%
  pull(Barcodes)

activa_validation_celltype <- ann68kpbmc %>%
  filter(barcodes %in% activa_val_barcodes)

# table(rownames(activa_validation@meta.data) == activa_validation_celltype$barcodes)

# scGAN Validation Cell Labels
scgan_validation@meta.data

scgan_val_barcodes <- scgan_validation@meta.data %>%
  tibble::rownames_to_column(var = "Barcodes") %>%
  pull(Barcodes)

scgan_validation_celltype <- ann68kpbmc %>%
  filter(barcodes %in% scgan_val_barcodes)

# table(rownames(scgan_validation@meta.data) == scgan_validation_celltype$barcodes)

# Generate dataset labels for the metadata
real_labs <- rep("Real (Test Set)", nrow(real_validation@meta.data))
activa_labs <- rep("ACTIVA", nrow(activa_validation@meta.data))
scgan_labs <- rep("scGAN", nrow(scgan_validation@meta.data))

## Real data
real_validation <- AddMetaData(real_validation,
                               metadata = real_labs,
                               col.name = "dataset_label") # adding dataset label to metadata
real_validation <- AddMetaData(real_validation,
                               metadata = real_validation_celltype$celltype,
                               col.name = "celltype") # Adding Celltype to metadata
## scGAN data
scgan_validation <- AddMetaData(scgan_validation,
                                metadata = scgan_labs,
                                col.name = "dataset_label") # adding dataset label to metadata
scgan_validation <- AddMetaData(scgan_validation,
                                metadata = scgan_validation_celltype$celltype,
                                col.name = "celltype") # Adding Celltype to metadata
## ACTIVA data
activa_validation <- AddMetaData(activa_validation,
                                 metadata = activa_labs,
                                 col.name = "dataset_label") # adding dataset label to metadata
activa_validation <- AddMetaData(activa_validation,
                                 metadata = activa_validation_celltype$celltype,
                                 col.name = "celltype") # Adding Celltype to metadata

Dataset Integration —————————————————–

Variable Gene Selection

# 3000 highly variable genes identified using variance stabilizing transformation - vst
real_validation <- FindVariableFeatures(object = real_validation,
                                        nfeatures = 3000,
                                        selection.method = "vst")

scgan_validation <- FindVariableFeatures(object = scgan_validation,
                                         nfeatures = 3000,
                                         selection.method = "vst")

activa_validation <- FindVariableFeatures(object = activa_validation,
                                          nfeatures = 3000,
                                          selection.method = "vst")

Integration

val_list <- list(real_validation,
                 scgan_validation, activa_validation)
# Integration
validation.anchors <-
  FindIntegrationAnchors(object.list = val_list,
                         dims = 1:30,
                         anchor.features = 3000)
## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.
## Computing 3000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 20252 anchors
## Filtering anchors
##  Retained 4896 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 19407 anchors
## Filtering anchors
##  Retained 3834 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 19766 anchors
## Filtering anchors
##  Retained 3483 anchors
validation.int <- IntegrateData(anchorset = validation.anchors,
                                dims = 1:30)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 3 into 1 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Warning: Adding a command log without an assay associated with it
DefaultAssay(validation.int) <- "integrated"

Scaling, Dimensional reduction, Clustering ——————————

Scaling and PCA

# Scaling
validation.int <- ScaleData(validation.int)
## Centering and scaling data matrix
# Dim reduction w/PCA
validation.int <- RunPCA(validation.int)
## PC_ 1 
## Positive:  MALAT1, B2M, CD3D, LDHB, BTG1, NPM1, TMEM66, EEF1B2, HNRNPA1, CD27 
##     GLTSCR2, GIMAP7, PTPRCAP, LTB, CD7, RPLP0, RPS20, IL7R, IL32, EEF1D 
##     LEF1, RPL34, CCR7, NACA, JUNB, GZMM, CD52, PACSIN3, RPL35A, TCF7 
## Negative:  AIF1, CST3, FTL, LST1, TYROBP, FCN1, SPI1, PSAP, FTH1, SERPINA1 
##     TMEM176B, TYMP, SAT1, CFD, S100A11, HLA-DRB1, S100A9, FCER1G, CTSS, IFI30 
##     CFP, HLA-DRB5, LYZ, PYCARD, HCK, S100A4, CD68, PILRA, RP11-290F20.3, GSTP1 
## PC_ 2 
## Positive:  RPL34, RPLP1, RPL35A, LTB, RPL36, RPS28, HLA-DRA, RPLP0, RPL35, CD74 
##     HLA-DRB1, RPS20, FAU, JUNB, RPS11, CD79A, IFI27, PABPC1, RPL37A, RPL37 
##     LY86, LDHB, UBA52, ANKRD20A1, ZNF507, HLA-DPB1, HLA-DRB5, HLA-DQA2, HLA-DQB1, CD79B 
## Negative:  NKG7, GNLY, B2M, CST7, CCL5, GZMB, ACTB, GZMA, TMSB4X, FGFBP2 
##     SH3BGRL3, CTSW, PRF1, FERMT3, PPBP, SRGN, TPST2, PF4, CD99, MYL12A 
##     GNG11, CLU, HLA-A, CTSA, TREML1, F13A1, HOPX, TAGLN2, RUFY1, PLEK 
## PC_ 3 
## Positive:  NKG7, GNLY, CST7, GZMA, GZMB, FGFBP2, PRF1, CTSW, HOPX, GZMH 
##     SPON2, CLIC3, CCL5, HLA-A, HCST, FCGR3A, PFN1, CD63, GZMM, B2M 
##     PRSS23, IFITM2, MATK, CYBA, CD247, ITGB2, CD7, GPR56, IGFBP7, TYROBP 
## Negative:  RGS10, GPX1, PPBP, GNG11, PF4, CLU, LTB, F13A1, TUBA4A, TREML1 
##     NRGN, CD9, SPARC, AC147651.3, GAS2L1, ITGA2B, RUFY1, CTSA, TAGLN2, TUBB1 
##     GP9, CLEC1B, PKM, FERMT3, SDPR, C6orf25, ACRBP, SPINT2, GRHL1, MYL9 
## PC_ 4 
## Positive:  LDHB, S100A4, S100A6, S100A9, S100A8, CD3D, GIMAP7, AIF1, S100A11, FTL 
##     TMEM176B, CD27, FCN1, CFD, IL7R, TSPO, LST1, TMEM66, TYMP, SERPINA1 
##     CD14, JUNB, LEF1, TIMP1, RP11-290F20.3, MAL, PSAP, RPL34, TMSB4X, LAT 
## Negative:  CD74, HLA-DRA, CD79A, CD79B, MS4A1, HLA-DPA1, HLA-DPB1, TCL1A, HLA-DQA1, HLA-DQA2 
##     HLA-DRB1, HLA-DQB1, HLA-DMB, HLA-DRB5, CD37, SPIB, VPREB3, CD22, LINC00926, PPAPDC1B 
##     HLA-DMA, HVCN1, BANK1, CD40, FCER2, CYB561A3, CD19, CD72, HLA-DOB, IRF8 
## PC_ 5 
## Positive:  LRRC26, SCT, DERL3, SERPINF1, S100A8, ITM2C, MZB1, S100A9, IRF7, SEC61B 
##     LILRA4, PLD4, MYCL, RPPH1, MS4A6A, PTCRA, PTGDS, GPX1, CD14, ALOX5AP 
##     MYBL2, RPLP1, DNASE1L3, SSR4, CYBA, SMPD3, PACSIN1, SERF2, TPM2, TMEM8B 
## Negative:  MS4A7, RP11-290F20.3, FCGR3A, LRRC25, PILRA, LST1, HMOX1, VMO1, SERPINA1, COTL1 
##     CTSS, IFITM3, HLA-DPA1, HES4, FAM26F, HCK, AIF1, CFD, FTH1, SPI1 
##     ACTB, IFI30, C19orf38, C1QA, SAT1, FCER1G, RHOC, TESC, LYPD2, GPBAR1
ElbowPlot(validation.int, ndims = 40)

Jackstraw plot

# Jackstraw plot
# Takes a bit of time to run
validation.int <- JackStraw(validation.int,
                            dims = 50)
validation.int <- ScoreJackStraw(validation.int, dims = 1:50)
JackStrawPlot(validation.int, dims = 1:50)

Clustering

# Clustering
usepcs_int <- 1:50
validation.int <- FindNeighbors(object = validation.int,
                                dims = usepcs_int)
## Computing nearest neighbor graph
## Computing SNN
validation.int <- FindClusters(
  object = validation.int,
  resolution = seq(0.20,2,0.10)
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9186
## Number of communities: 13
## Elapsed time: 6 seconds
## 4 singletons identified. 9 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8971
## Number of communities: 15
## Elapsed time: 6 seconds
## 4 singletons identified. 11 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8812
## Number of communities: 17
## Elapsed time: 5 seconds
## 4 singletons identified. 13 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8660
## Number of communities: 18
## Elapsed time: 7 seconds
## 4 singletons identified. 14 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8511
## Number of communities: 19
## Elapsed time: 5 seconds
## 4 singletons identified. 15 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8365
## Number of communities: 19
## Elapsed time: 5 seconds
## 4 singletons identified. 15 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8220
## Number of communities: 20
## Elapsed time: 5 seconds
## 4 singletons identified. 16 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8078
## Number of communities: 21
## Elapsed time: 5 seconds
## 4 singletons identified. 17 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7937
## Number of communities: 21
## Elapsed time: 4 seconds
## 4 singletons identified. 17 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7796
## Number of communities: 21
## Elapsed time: 5 seconds
## 4 singletons identified. 17 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7658
## Number of communities: 21
## Elapsed time: 6 seconds
## 4 singletons identified. 17 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7521
## Number of communities: 22
## Elapsed time: 6 seconds
## 4 singletons identified. 18 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7386
## Number of communities: 23
## Elapsed time: 6 seconds
## 4 singletons identified. 19 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7263
## Number of communities: 23
## Elapsed time: 5 seconds
## 4 singletons identified. 19 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7146
## Number of communities: 23
## Elapsed time: 6 seconds
## 4 singletons identified. 19 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7039
## Number of communities: 26
## Elapsed time: 5 seconds
## 4 singletons identified. 22 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6958
## Number of communities: 25
## Elapsed time: 7 seconds
## 4 singletons identified. 21 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6871
## Number of communities: 25
## Elapsed time: 7 seconds
## 4 singletons identified. 21 final clusters.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20973
## Number of edges: 1713744
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6790
## Number of communities: 25
## Elapsed time: 5 seconds
## 4 singletons identified. 21 final clusters.
grep("res", colnames(validation.int@meta.data), value = TRUE) %>%
  purrr::map_chr( ~ paste(.x, "--> clusters generated:", length(unique(
    validation.int@meta.data[, .x]
  ))))
##  [1] "integrated_snn_res.0.2 --> clusters generated: 9" 
##  [2] "integrated_snn_res.0.3 --> clusters generated: 11"
##  [3] "integrated_snn_res.0.4 --> clusters generated: 13"
##  [4] "integrated_snn_res.0.5 --> clusters generated: 14"
##  [5] "integrated_snn_res.0.6 --> clusters generated: 15"
##  [6] "integrated_snn_res.0.7 --> clusters generated: 15"
##  [7] "integrated_snn_res.0.8 --> clusters generated: 16"
##  [8] "integrated_snn_res.0.9 --> clusters generated: 17"
##  [9] "integrated_snn_res.1 --> clusters generated: 17"  
## [10] "integrated_snn_res.1.1 --> clusters generated: 17"
## [11] "integrated_snn_res.1.2 --> clusters generated: 17"
## [12] "integrated_snn_res.1.3 --> clusters generated: 18"
## [13] "integrated_snn_res.1.4 --> clusters generated: 19"
## [14] "integrated_snn_res.1.5 --> clusters generated: 19"
## [15] "integrated_snn_res.1.6 --> clusters generated: 19"
## [16] "integrated_snn_res.1.7 --> clusters generated: 22"
## [17] "integrated_snn_res.1.8 --> clusters generated: 21"
## [18] "integrated_snn_res.1.9 --> clusters generated: 21"
## [19] "integrated_snn_res.2 --> clusters generated: 21"
Idents(validation.int) <- "integrated_snn_res.0.3"

Dimension reduction

# Dimension reduction
validation.int <- RunTSNE(object = validation.int, 
                          reduction.use = "pca", 
                          dims = usepcs_int,
                          do.fast = TRUE)

validation.int <- RunUMAP(validation.int, 
                          reduction = "pca", 
                          dims = usepcs_int)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:35:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:35:07 Read 20973 rows and found 50 numeric columns
## 11:35:07 Using Annoy for neighbor search, n_neighbors = 30
## 11:35:07 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:35:10 Writing NN index file to temp file /var/folders/r9/z4byyk895zbc4qs07n05cph80000gn/T//RtmpNWmiY2/filea8e6ca44160
## 11:35:10 Searching Annoy index using 1 thread, search_k = 3000
## 11:35:18 Annoy recall = 100%
## 11:35:19 Commencing smooth kNN distance calibration using 1 thread
## 11:35:21 Initializing from normalized Laplacian + noise
## 11:35:22 Commencing optimization for 200 epochs, with 1048706 positive edges
## 11:35:35 Optimization finished

Saving object

# Saving the datasets in different formats for interoperability with python packages
save(validation.int, file = "data/final_68kpbmc_val_int_clust.RData")
SaveH5Seurat(validation.int,
             filename = "final_68kpbmc_val_int_clust.h5Seurat",
             overwrite = FALSE)
Convert("final_68kpbmc_val_int_clust.h5Seurat", dest = "h5ad")

Visualization with scater ———————————————–

Prep for plotting

# scater is a toolkit like Seurat but uses a different data structure (Single Cell Experiment (SCE))

# First will use seurat to generate the SCE data by calling the as.SingleCellExperiment() on our Seurat object

# Removing negative expression values before converting to SCE object
cleaned_CD79A <- WhichCells(validation.int,
                            expression = CD79A >= 0)
validation.int <- subset(validation.int, cells = cleaned_CD79A)
cleaned_MS4A1 <- WhichCells(validation.int,
                            expression = MS4A1 >= 0)
validation.int <- subset(validation.int, cells = cleaned_MS4A1)

# Converting the seurat object into a SCE object
val_int_sce <- as.SingleCellExperiment(validation.int)
names(assays(val_int_sce)) <- "counts" # making sure assay name is correct for expression values

# Creating an object "logcounts" which houses log2 transformed count values
logcounts(val_int_sce) <- log2(counts(val_int_sce) + 1)
## Warning in eval(call, parent.frame()): NaNs produced
dim(logcounts(val_int_sce))
## [1]  3000 18318

Plotting w/scater

p_clusters <-
  plotUMAP(val_int_sce, colour_by = "ident", other_fields = "dataset_label") +
  facet_wrap( ~ dataset_label) +
  theme(legend.title = element_blank(),
        strip.text.x = element_text(face = "bold"))
p_clusters
Clusters

Clusters

ggsave(
  filename = "plots/integrated/pbmc68k_final_validation_scater_umap.png",
  dpi = 320,
  units = "in",
  width = 8,
  height = 4
)
p_CD79A <- plotExpression(
  val_int_sce,
  features = c("CD79A"),
  colour_by = "ident",
  x = "ident",
  other_fields = "dataset_label",
  exprs_values = "logcounts"
) +
  facet_wrap(~ dataset_label) +
  xlab("Cluster") +
  ggtitle("B-Cell Marker: CD79A") +
  theme(legend.title = element_blank(),
        strip.text.x = element_text(face = "bold"))
p_CD79A
Violin Plots

Violin Plots

ggsave(
  filename = "plots/integrated/pbmc68k_final_validation_scater_violin_bcell_CD79A_RC.png",
  dpi = 320,
  units = "in",
  width = 8,
  height = 4
)

p_MS4A1 <- plotExpression(
  val_int_sce,
  features = c("MS4A1"),
  colour_by = "ident",
  x = "ident",
  other_fields = "dataset_label",
  exprs_values = "logcounts"
) +
  facet_wrap( ~ dataset_label) +
  ggtitle("B-Cell Marker: MS4A1") +
  xlab("Cluster") +
  theme(legend.title = element_blank(),
        strip.text.x = element_text(face = "bold"))
p_MS4A1
Violin Plots

Violin Plots

ggsave(
  filename = "plots/integrated/pbmc68k_final_validation_scater_violin_bcell_MS4A1_logcounts.png",
  dpi = 320,
  units = "in",
  width = 8,
  height = 4
)
p_density_CD79A <-
  plotUMAP(val_int_sce, colour_by = "CD79A", other_fields = "dataset_label") +
  facet_wrap( ~ dataset_label) +
  scale_fill_viridis_c(option = "B") +
  ggtitle("B-Cell Marker: CD79A") +
  theme(legend.title = element_blank(),
        strip.text.x = element_text(face = "bold"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p_density_CD79A
Expression Plot

Expression Plot

ggsave(
  filename = "plots/integrated/pbmc68k_final_validation_scater_UMAPexpression_bcell_CD79A.png",
  dpi = 320,
  units = "in",
  width = 8,
  height = 4
)

p_density_MS4A1 <-
  plotUMAP(val_int_sce, colour_by = "MS4A1", other_fields = "dataset_label") +
  facet_wrap( ~ dataset_label) +
  scale_fill_viridis_c(option = "B") +
  ggtitle("B-Cell Marker: MS4A1") +
  theme(legend.title = element_blank(),
        strip.text.x = element_text(face = "bold"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p_density_MS4A1
Expression Plot

Expression Plot

ggsave(
  filename = "plots/integrated/pbmc68k_final_validation_scater_UMAPexpression_bcell_MS4A1.png",
  dpi = 320,
  units = "in",
  width = 8,
  height = 4
)